package olevod

import (
	"bufio"
	"bytes"
	"errors"
	"fmt"
	"github.com/gocolly/colly"
	"github.com/gogf/gf/v2/encoding/gjson"
	"github.com/gogf/gf/v2/frame/g"
	"github.com/gogf/gf/v2/os/gctx"
	"github.com/gogf/gf/v2/os/gfile"
	"github.com/gogf/gf/v2/os/gtime"
	"github.com/gogf/gf/v2/text/gregex"
	"github.com/gogf/gf/v2/text/gstr"
	"github.com/gogf/gf/v2/util/grand"
	"strings"
	"sync"
	"sync/atomic"
	"time"
	"uni-crawl-frame/core"
	"uni-crawl-frame/db/mysql/model/entity"
	"uni-crawl-frame/service/crawl/sysservice"
	"uni-crawl-frame/utils/browserutil"
	"uni-crawl-frame/utils/ffmpegutil"
	"uni-crawl-frame/utils/fileutil"
)

var (
	videoXpath       = "//*[@id='playerCnt']"
	programNameXpath = "//*[@class='title margin_0']"
	programItemXpath = "//*[@id='hl02']//a[@class='%s']"
	jsonRegex        = "player_aaaa=({.*?})"
)

type urlParams struct {
	url       string
	isSupport bool
	id        string
	htmlId    string
}

type OleVodCrawl struct {
	*core.AbstractCrawlVodFlow
}

func (c *OleVodCrawl) UseBrowser() bool {
	return true
}

func (c *OleVodCrawl) UseBrowserMobProxy() bool {
	return false
}

func (c *OleVodCrawl) OpenBrowser(ctx *core.ApplicationContext) {
	fmt.Printf("欧乐爬虫开始：%v,\r\n", ctx.CrawlQueueSeed)
	_ = ctx.Wd.Get(ctx.CrawlQueueSeed.CrawlSeedUrl)
	_ = ctx.Wd.WaitWithTimeout(browserutil.GetXpathCondition(videoXpath), gtime.S*30)

	urlParam := c.buildUrlParam(ctx.CrawlQueueSeed.CrawlSeedUrl)
	g.Log().Infof(gctx.GetInitCtx(), "剧名：%s", browserutil.GetTextByXpath(ctx.Wd, programNameXpath))
	g.Log().Infof(gctx.GetInitCtx(), "集数：%s", browserutil.GetTextByXpath(ctx.Wd, fmt.Sprintf(programItemXpath, urlParam.id+urlParam.htmlId)))

	rets := browserutil.GetTextByRegex(ctx.Wd, jsonRegex)
	if len(rets) == 2 {
		m3u8Url := gjson.New(rets[1]).Get("url").String()
		g.Log().Infof(gctx.GetInitCtx(), "m3u8: %s", m3u8Url)
		ctx.CrawlQueueSeed.CrawlM3U8Url = m3u8Url

		validateAndResetM3U8Url(ctx)
	}
}

func (c *OleVodCrawl) FillTargetRequest(ctx *core.ApplicationContext) {
	// 空实现
}

func (c *OleVodCrawl) buildUrlParam(url string) *urlParams {
	up := new(urlParams)
	rets, _ := gregex.MatchString(`.*/id/(\d+)/sid/1/nid/(\d+).html`, url)
	if len(rets) != 3 {
		up.isSupport = false
		return up
	}
	up.id = rets[1]
	up.htmlId = rets[2]
	return up
}

func (c *OleVodCrawl) FillTargetRequestBak(ctx *core.ApplicationContext) {

	urlParam := c.buildUrlParam(ctx.CrawlQueueSeed.CrawlSeedUrl)
	if !urlParam.isSupport {
		ctx.CrawlQueueSeed.ErrorMsg = "不支持的Ole Url格式"
		return
	}
	ctx.Log.Line().Infof(gctx.GetInitCtx(), "id = %s, htmlId = %s", urlParam.id, urlParam.htmlId)

	coll := colly.NewCollector()

	coll.OnXML(programNameXpath, func(element *colly.XMLElement) {
		ctx.Log.Info(gctx.GetInitCtx(), "剧名: ", element.Text)
	})

	coll.OnXML(fmt.Sprintf(programItemXpath, urlParam.id+urlParam.htmlId), func(element *colly.XMLElement) {
		ctx.Log.Info(gctx.GetInitCtx(), "集数: ", element.Text)
	})
	coll.OnRequest(func(request *colly.Request) {
		//request.ProxyURL = "http://xlive:xlive@104.149.143.54:9990"
		//var Header map[string][]string=
		//request.Headers=Header
	})
	coll.OnResponse(func(response *colly.Response) {

		body := string(response.Body)
		jsons, _ := gregex.MatchString(jsonRegex, body)
		m3u8Url := gjson.New(jsons[1]).Get("url").String()
		ctx.Log.Info(gctx.GetInitCtx(), "m3u8: ", m3u8Url)
		ctx.CrawlQueueSeed.CrawlM3U8Url = m3u8Url
	})
	//coll.SetProxy("http://xlive:xlive@104.149.143.54:9990")
	err := coll.Visit(ctx.CrawlQueueSeed.CrawlSeedUrl)
	if err != nil {
		ctx.Log.Error(gctx.GetInitCtx(), err)
		return
	}

	validateAndResetM3U8Url(ctx)

}

func validateAndResetM3U8Url(ctx *core.ApplicationContext) {
	// 二次转换
	if !gstr.ContainsI(ctx.CrawlQueueSeed.CrawlM3U8Url, "master") {
		return
	}

	baseUrl := getBaseUrl(ctx.CrawlQueueSeed.CrawlM3U8Url)
	coll := colly.NewCollector()
	coll.OnResponse(func(response *colly.Response) {
		buffer := bytes.NewBuffer(response.Body)
		scanner := bufio.NewScanner(buffer)
		for scanner.Scan() {
			line := scanner.Text()
			if gstr.ContainsI(line, "m3u8") {
				ctx.CrawlQueueSeed.CrawlM3U8Url = baseUrl + line
				ctx.Log.Info(gctx.GetInitCtx(), "最终m3u8: ", ctx.CrawlQueueSeed.CrawlM3U8Url)
				break
			}
		}
	})
	err := coll.Visit(ctx.CrawlQueueSeed.CrawlM3U8Url)
	if err != nil {
		ctx.Log.Error(gctx.GetInitCtx(), err)
	}
}

func (c *OleVodCrawl) ConvertM3U8(seed *entity.CmsCrawlQueue, filePath string) (*ffmpegutil.M3u8DO, error) {
	log := g.Log().Line()
	baseUrl := getBaseUrl(seed.CrawlM3U8Url)
	log.Info(gctx.GetInitCtx(), "base url: ", baseUrl)

	m3u8DO := new(ffmpegutil.M3u8DO)
	m3u8DO.FromUrl = seed.CrawlSeedUrl
	m3u8DO.FromBaseUrl = baseUrl
	m3u8DO.FromFile = filePath
	m3u8DO.FromDir = gfile.Dir(filePath)

	_ = gfile.ReadLines(filePath, func(line string) error {

		m3u8LineDO := new(ffmpegutil.StreamLineDO)
		m3u8LineDO.LineType = -1
		m3u8LineDO.SrcType = -1
		m3u8LineDO.OriginLine = line
		m3u8LineDO.TransformedLine = line

		if gstr.HasPrefix(line, "#") {
			if gstr.ContainsI(line, ffmpegutil.ExtMapLine) {
				m3u8LineDO.LineType = ffmpegutil.LineTypeXMap
			} else if gstr.ContainsI(line, ffmpegutil.KeyLine) {
				m3u8LineDO.LineType = ffmpegutil.LineTypeKey
			} else {
				m3u8LineDO.LineType = ffmpegutil.LineTypeComment
			}
		} else {
			m3u8LineDO.LineType = ffmpegutil.LineTypeSrc
		}

		m3u8DO.StreamLineList = append(m3u8DO.StreamLineList, *m3u8LineDO)
		return nil
	})
	return m3u8DO, nil
}

func getBaseUrl(m3u8Url string) string {
	baseUrlIdx := strings.LastIndex(m3u8Url, "/")
	baseUrl := gstr.SubStr(m3u8Url, 0, baseUrlIdx+1)
	return baseUrl
}

func (c *OleVodCrawl) DownLoadToMp4(m3u8DO *ffmpegutil.M3u8DO) error {
	log := g.Log().Line()

	ffmpegutil.DiscardTsWhenDebug(m3u8DO)
	maxChan := make(chan bool, 10)
	var failCount int64 = 0
	wg := sync.WaitGroup{}

	proxyUrl := sysservice.GetProxyByUrl(m3u8DO.FromUrl)
	log.Info(gctx.GetInitCtx(), "DownSaveTo: ", m3u8DO.FromDir)

	err := ffmpegutil.DownloadDependenceFile(m3u8DO, proxyUrl)
	if err != nil {
		return err
	}

	// 开启多线程下载
	for _, m3u8LineDO := range m3u8DO.StreamLineList {
		wg.Add(1)
		if atomic.LoadInt64(&failCount) > 0 {
			return errors.New("")
		}

		maxChan <- true
		go func(lineDO ffmpegutil.StreamLineDO) {
			if lineDO.LineType != ffmpegutil.LineTypeSrc {
				<-maxChan
				wg.Done()
				return
			}

			// 随机等几百毫秒，一定程度防止把对方服务弄垮，也防止把自己机器CPU跑太高
			rand := grand.Intn(1000)
			time.Sleep(time.Nanosecond * time.Duration(rand))
			m4sFilePath := m3u8DO.FromDir + gfile.Separator + lineDO.OriginLine
			err2 := fileutil.DownloadFile(m3u8DO.FromBaseUrl+lineDO.OriginLine, proxyUrl, m4sFilePath, fileutil.Retry)
			<-maxChan
			wg.Done()
			if err2 != nil {
				atomic.AddInt64(&failCount, 1)
				return
			}
		}(m3u8LineDO)
	}
	wg.Wait()

	err = ffmpegutil.MergeTsFile(m3u8DO)
	if err != nil {
		return err
	}

	ffmpegutil.DeleteTmpResource(m3u8DO, "*.m4s")

	return nil
}
